suppressPackageStartupMessages({
# Load required packages
library(SingleCellExperiment)
library(dplyr)
library(magrittr)
library(janitor)
library(tidyr)
library(stringr)
library(readxl)
library(data.table)
library(cowplot)
library(ComplexHeatmap)
library(circlize)
library(scran)
library(cutpointr)
library(glmnet)
library(ClassifyR)
library(survival)
library(survminer)
library(ggthemr)
source("benchmarkUtils.R")
})
rename <- dplyr::rename
select <- dplyr::select
# Set ggplot theme
ggthemr_reset()
ggthemr('pale')
mycolors <- c("#999999", "#0072B2", "#E69F00", "#F0E442", "#D9B3FF", "#009E73",
"#D55E00", "#5D8AA8", "#CC79A7", "#56B4E9",
"#F3B3A6", "#A5AB81", "#B2182B", "#4393C3", "#CDBE6B",
"#80CDC1", "#F4A582", "#BABABA", "#CCEBC5", "#DECBE4",
"#FDDFDF", "#B3DE69", "#FDBF6F", "#CCECE6", "#FB8072")
dl_method_pal <- c(
"TCGN" = "#CA9C91",
"THItoGene" ="#BA7FB5",
"EGNv1" = "#B3D46B",
"EGNv2" = "#F7CBDF",
"DeepPT"="#80B1D2",
"DeepSpaCE"="#F18072",
"ST-Net"="#8CD0C3",
"HisToGene"="#f1c232",
"Hist2ST"="#F9B063",
"GeneCodeR"="#BCB9D8",
"RNA-Seq"="#BABABA",
"RNA-Seq-STgenes"="#BABABA"
)
mypalette <- define_palette(
swatch = mycolors,
gradient = c(lower = mycolors[1L], upper = mycolors[3L])
)
ggthemr(mypalette) # for some reason it uses the first colour as the colour of the grid lines
theme_update(panel.grid.major = element_line(linetype="dotted"))
th <- theme(text=element_text(size=15),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "black", size=0.7, fill=NA) )
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# load the dataset
tcga_rnaseq_data <- readRDS(
file="data/raw/tcga_brca/tcga_breast_matched_rnaseq_sce.rds"
)
# Get calculated metrics
pred_feat_cor <- readRDS("data/processed/her2st/her2st_pred_feat_cor_11.rds") %>%
mutate(model_id = case_when(
model_id == "genecoder_i500_j500" ~ "GeneCodeR",
TRUE ~ model_id
)) %>%
mutate(ssim = ifelse(ssim < 0, 0, ssim))
gene_cols <- pred_feat_cor %>% distinct(gene) %>% pull
# Get TNBC Patients
tcga_clinical_full <- read.delim("data/raw/tcga_brca/BRCA.clin.merged.txt", header=FALSE) %>%
t() %>%
as.data.table() %>%
row_to_names(1)
tnbc_pats <- tcga_clinical_full %>%
filter(`patient.lab_proc_her2_neu_immunohistochemistry_receptor_status`=="negative" &
patient.breast_carcinoma_estrogen_receptor_status=="negative" &
patient.breast_carcinoma_progesterone_receptor_status=="negative") %>%
pull(patient.bcr_patient_barcode) %>%
toupper()
# Calculate HV Genes
dec_tcga <- modelGeneVar(assay(tcga_rnaseq_data, "normalized_count")[which(rownames(assay(tcga_rnaseq_data, "normalized_count")) %in% gene_cols),])
# Get the top 10% of genes.
hv_genes_tcga <- getTopHVGs(dec_tcga, fdr.threshold = .05)
The following is a dataframe with the following columns:
tcga_pred_comb_dat <- read.csv('data/raw/tcga_brca/her2st_tcga_pred_comb_dat_mean.csv')
glimpse(tcga_pred_comb_dat)
## Rows: 4,947,145
## Columns: 7
## $ img_id <chr> "TCGA-4H-AAAK-01Z-00-DX1", "TCGA-5L-AAT0-01Z-00-DX1", "TCGA-5…
## $ gene <chr> "HPS6", "HPS6", "HPS6", "HPS6", "HPS6", "HPS6", "HPS6", "HPS6…
## $ pred <dbl> 0.1395431, 0.1476364, 0.1347771, 0.1544416, 0.1290884, 0.1219…
## $ model_id <chr> "HisToGene", "HisToGene", "HisToGene", "HisToGene", "HisToGen…
## $ pat_id <chr> "TCGA-4H-AAAK", "TCGA-5L-AAT0", "TCGA-5L-AAT1", "TCGA-5T-A9QA…
## $ rna_id <chr> "TCGA-4H-AAAK-01A-12R-A41B-07", "TCGA-5L-AAT0-01A-12R-A41B-07…
## $ exprs <dbl> 570.2128, 870.5089, 722.9014, 463.9121, 591.1835, 880.7978, 6…
Boxplot of patient-level correlations between predicted pseudobulk gene expression and RNA-Seq bulk gene expression over all analysed TCGA-BRCA images split by method. Models were trained on the HER2+ ST dataset, selecting the model from the 4-fold CV with the best test set performance.
tcga_pred_pat_m_df <- tcga_pred_comb_dat %>%
filter(log(exprs) > 5) %>%
mutate(pred = log(ifelse(pred < 0, 0, pred)+1)) %>%
mutate(exprs = log(exprs)) %>%
filter(!is.na(pred)) %>%
group_by(pat_id, img_id, model_id) %>%
summarise(
cor_pearson = cor(exprs, pred, method = "pearson"),
cor_spearman = cor(exprs, pred, method = "spearman"),
var_exprs = var(exprs),
var_pred = var(pred),
mean_exprs = mean(exprs),
mean_pred = mean(pred),
rmse = sqrt(sum((pred - exprs) ^ 2) / n()),
mi = calculate_MI(pred, exprs),
js_div = calculate_JS_divergence(pred, exprs),
auc_0 = calculate_AUC(pred, exprs, 0),
nrmse_range = calculate_nrmse(pred, exprs, "range"),
nrmse_sd = calculate_nrmse(pred, exprs, "sd"),
.groups = "drop"
)
tcga_pred_pat_m_df <- tcga_pred_pat_m_df%>%
arrange(factor(model_id, levels=c("ST-Net", "HisToGene", "GeneCodeR", "DeepSpaCE", "DeepPT", "Hist2ST", "EGNv1", "EGNv2", "TCGN", "THItoGene")))
tcga_pred_pat_m_df$model_id <- factor(tcga_pred_pat_m_df$model_id, levels = unique(tcga_pred_pat_m_df$model_id))
p_pat_cor <- tcga_pred_pat_m_df %>%
ggplot()+
aes(x=model_id,y=cor_pearson, col=model_id, fill=model_id)+
geom_violin(alpha=0.2)+
geom_boxplot(alpha=0.65, width=0.3)+
labs(y="PCC", x="")+
scale_color_manual(values=dl_method_pal)+
scale_fill_manual(values=dl_method_pal)+
theme(legend.position="none") +
th
p_pat_cor
Scatterplot of patient-level correlations between predicted pseudobulk gene expression and RNA-Seq bulk gene expression vs. first principal component of calculated H&E quality control metrics. H&E QC metrics were calculated on stage I breast cancer patients. Line of best fit plotted along with Pearson correlation and associated p-value from a correlation test.
tcga_her2_qc_dat <- read.delim("data/raw/qc_tcga_her2_results_first.tsv",
sep = "\t", header=TRUE, skip=5, row.names = NULL) %>%
`colnames<-`(colnames(.)[c(2:ncol(.), 1)]) %>%
dplyr::select(-comments, -row.names) %>%
rename(filename=X.dataset.filename)
tcga_her2_qc_stand <- read.delim("data/raw/qc_tcga_her2_results_standard.tsv",
sep = "\t", header=TRUE, skip=5, row.names = NULL) %>%
`colnames<-`(colnames(.)[c(2:ncol(.), 1)]) %>%
dplyr::select(-comments, -row.names) %>%
rename(filename=X.dataset.filename)
tcga_her2_qc_df <- tcga_her2_qc_dat %>%
mutate(pat_id = gsub(".tiff", "", filename)) %>%
dplyr::select(
c(
michelson_contrast ,
rms_contrast ,
grayscale_brightness,
chan1_brightness,
chan2_brightness,
chan3_brightness,
chan1_brightness_YUV,
chan2_brightness_YUV,
chan3_brightness_YUV, pat_id))
tcga_her2_qc_stand_df <- tcga_her2_qc_stand %>%
mutate(pat_id = gsub(".tiff", "", filename)) %>%
dplyr::select(c(pat_id))
merge_her2_qc <- merge(tcga_her2_qc_df, tcga_her2_qc_stand_df, by = "pat_id")
pca_res <- prcomp(merge_her2_qc %>%
select(-pat_id),
center=TRUE,
scale=TRUE)
p_pc_pat_cor <- data.frame(
PC1=pca_res$x[,"PC1"],
pat_id=merge_her2_qc %>% select(pat_id)
) %>%
left_join(
tcga_pred_pat_m_df %>%
select(pat_id, model_id, cor_pearson),
by="pat_id"
)
p_pc_pat_cor_res <- p_pc_pat_cor %>%
filter(PC1 < 4) %>%
group_by(model_id) %>%
group_modify(~{
tidy(cor.test(.x$cor_pearson, .x$PC1)) %>%
mutate(n = nrow(.x))
}) %>%
mutate(
report = sprintf(
"italic(t)~(%d)~'='~%.2f*','~italic(p)~'='~%.2g~','~italic(r)~'='~%.2f*','~'95%%~CI'~'['~%.2f*','~%.2f~']'",
n - 2, statistic, p.value, estimate, conf.low, conf.high
)
)
p_tcga_histoqc <- p_pc_pat_cor %>%
filter(PC1 < 4) %>%
ggplot() +
aes(x = PC1, y = cor_pearson) +
geom_point() +
geom_smooth(data = subset(p_pc_pat_cor, PC1 < 4),
method = "lm", formula = "y~x", alpha = .1) +
geom_text(
data = p_pc_pat_cor_res,
aes(label = report, x = Inf, y = Inf),
hjust = 1, vjust = 1.2, size = 2, parse = TRUE, col = "grey20"
) +
facet_wrap(~model_id, nrow = 1, scales = "free_y") +
labs(
x = "HistoQC features",
y = "PCC",
title = "Correlation Analysis with HistoQC Features"
) +
th
p_tcga_histoqc
Xu J, Qin S, Yi Y, Gao H, Liu X, Ma F, Guan M. Delving into the Heterogeneity of Different Breast Cancer Subtypes and the Prognostic Models Utilizing scRNA-Seq and Bulk RNA-Seq. International Journal of Molecular Sciences. 2022; 23(17):9936. https://doi.org/10.3390/ijms23179936
bc_pat_luminal <- tcga_clinical_full %>%
filter( (patient.breast_carcinoma_estrogen_receptor_status=="positive" &
patient.breast_carcinoma_progesterone_receptor_status=="positive") |
(patient.breast_carcinoma_estrogen_receptor_status=="positive" &
patient.breast_carcinoma_progesterone_receptor_status=="negative")) %>%
pull(patient.bcr_patient_barcode) %>%
toupper()
bc_pat_her2 <- tcga_clinical_full %>%
filter( `patient.lab_proc_her2_neu_immunohistochemistry_receptor_status`=="positive"
) %>%
pull(patient.bcr_patient_barcode) %>%
toupper()
bc_pat_tnbc <- tcga_clinical_full %>%
filter(`patient.lab_proc_her2_neu_immunohistochemistry_receptor_status`=="negative" &
patient.breast_carcinoma_estrogen_receptor_status=="negative" &
patient.breast_carcinoma_progesterone_receptor_status=="negative") %>%
pull(patient.bcr_patient_barcode) %>%
toupper
# Get data
# Baseline Survival Prediction using TCGA
dpnd_var_df <- colData(tcga_rnaseq_data) %>%
as.data.frame() %>%
select(vital_status, days_to_collection, days_to_last_follow_up, ajcc_pathologic_stage,
days_to_death, patient,
paper_vital_status, paper_days_to_last_followup, paper_pathologic_stage) %>%
tibble::rownames_to_column("rna_id")
# Consider only the samples in the predicted data
tcga_dl_samp_id <- tcga_pred_comb_dat %>%
distinct(rna_id) %>%
pull
tcga_samp_id <- dpnd_var_df %>%
tibble::rownames_to_column() %>%
filter(rna_id %in% (tcga_dl_samp_id)) %>%
pull(rna_id)
task_data <- bind_cols(
t(assay(tcga_rnaseq_data, "normalized_count")) %>%
as.data.frame(),
dpnd_var_df %>%
select(patient, vital_status, days_to_last_follow_up, days_to_death) %>%
mutate(days_to_last_follow_up = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death)) %>%
select(-days_to_death)
) %>%
mutate(vital_status = as.integer(as.factor(vital_status)) - 1) %>%
filter(!is.na(vital_status)) %>%
janitor::clean_names()
# Clean data by removing lowest 20% absolute exp and lowest 10% variance
# Calculate the absolute expression values for each gene
exp_sum <- task_data %>%
select(-c(patient, vital_status, days_to_last_follow_up)) %>%
apply(., 2, sum)
# Calculate variance for each gene across samples
exp_var <- task_data %>%
select(-c(patient, vital_status, days_to_last_follow_up)) %>%
apply(., 2, var)
# Determine threshold values for selecting genes
abs_threshold <- quantile(exp_sum, 0.2)
variance_threshold <- quantile(exp_var, 0.1)
# Filter genes based on criteria
task_data_p <- task_data %>%
select(
c(patient, vital_status, days_to_last_follow_up),
all_of(names(which(exp_sum > abs_threshold & exp_var > variance_threshold)))
) %>%
filter(days_to_last_follow_up > 0) %>%
mutate_at(vars(-c("patient","vital_status","days_to_last_follow_up")),
function(col) log(col+1))
model_names <- tcga_pred_comb_dat %>%
distinct(model_id) %>%
pull()
getSurvPredGEDatList <- function(pat_subset) {
lapply(model_names,
function(model_name) {
tsk_dat <- dpnd_var_df %>%
select(patient, rna_id, vital_status, days_to_last_follow_up, days_to_death) %>%
mutate(days_to_last_follow_up = ifelse(is.na(days_to_death),
days_to_last_follow_up,
days_to_death)) %>%
select(-days_to_death) %>%
filter(patient %in% pat_subset) %>%
select(-patient) %>%
left_join(
tcga_pred_comb_dat %>%
filter(model_id == model_name) %>%
select(-exprs) %>%
mutate(pred = ifelse(pred < 0, 0, pred)) %>%
mutate(pred = log(pred+1)) %>%
# do mean normalisation
group_by(pat_id, model_id) %>%
mutate(pred = pred - mean(pred)) %>%
pivot_wider(names_from = "gene",
values_from = "pred"),
by = c("rna_id" = "rna_id")
) %>%
tibble::column_to_rownames(var="rna_id") %>%
select(-c(img_id, model_id, pat_id)) %>%
mutate(vital_status = as.integer(as.factor(vital_status))-1) %>%
# Remove NA in dependent var
filter(!is.na(vital_status)) %>%
filter(!is.na(days_to_last_follow_up) & days_to_last_follow_up > 0) %>%
filter(!is.na(HPS6)) %>%
janitor::clean_names()
return(tsk_dat)
})
}
calcSurvRisk <- function(surv_data) {
# Univariate Cox regression, get p-values
# Initialize an empty data frame to store p-values
p_value_df <- data.frame(gene = character(0), p_value = numeric(0))
# Loop through each gene column
for (gene_col in colnames(surv_data)[!(colnames(surv_data) %in%
c("vital_status", "days_to_last_follow_up"))]) {
# Fit a univariate Cox regression model
cox_model <- coxph(Surv(days_to_last_follow_up, vital_status) ~ get(gene_col),
data = surv_data)
# Extract the p-value and store it in the p_values data frame
p_value <- summary(cox_model)$coefficients[, "Pr(>|z|)"]
p_value_df <- rbind(p_value_df, data.frame(gene = gene_col, p_value = p_value))
}
#p_value_df <- data.frame(p_value_df)
# Filter genes with p-value < 0.05
if (sum(p_value_df$p_value < 0.005,na.rm=T) < 5) {
sign_uni_cox_genes <- p_value_df %>%
arrange(p_value) %>%
dplyr::slice(1:10)
} else {
sign_uni_cox_genes <- p_value_df[p_value_df$p_value < 0.005, ] %>%
filter(!is.na(p_value))
}
# Prepare the input data for Lasso regression
X <- surv_data[, sign_uni_cox_genes$gene, drop = FALSE]
Y <- surv_data[, c("days_to_last_follow_up", "vital_status")]
# Fit a Lasso regression model
lasso_model <- cv.glmnet(as.matrix(X), Surv(Y$days_to_last_follow_up, Y$vital_status),
alpha = 1, family = "cox")
# Extract the selected genes based on Lasso regularization,
# get the model with coefficients >= 8
if (any(lasso_model$nzero >= 8)) {
selected_genes <- coef(lasso_model, s = lasso_model$lambda[which(lasso_model$nzero >= 9)[1]-1])
} else {
selected_genes <- coef(lasso_model, s = lasso_model$lambda[which.max(lasso_model$nzero)])
}
# Prepare the input data for multivariate Cox regression
X_selected <- X[, rownames(selected_genes)[selected_genes@i], drop = FALSE]
# Fit a multivariate Cox regression model
multivariate_cox_model <- coxph(Surv(Y$days_to_last_follow_up, Y$vital_status) ~ .,
data = X_selected)
# Calculate the risk scores for each patient
patient_risk <- exp(coef(multivariate_cox_model) %*% t(as.matrix(X_selected)))
# Calculate the median risk score
median_pat_risk <- median(patient_risk)
pat_risk_df <- patient_risk %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("rna_id") %>%
rename(risk=V1) %>%
mutate(risk_cat = ifelse(risk >= median_pat_risk,"high-risk","low-risk")) %>%
bind_cols(Y)
surv_obj <- Surv(pat_risk_df$days_to_last_follow_up, pat_risk_df$vital_status)
surv_fit = survfit(
Surv(days_to_last_follow_up, vital_status) ~ risk_cat,
data = pat_risk_df
)
p_km_surv <- ggsurvplot(
fit = surv_fit,
data = pat_risk_df,
risk.table = FALSE,
surv.median.line = "hv",
pval = TRUE,
# pval.method = TRUE,
pval.coord = c(0, 0.25),
conf.int = TRUE,
legend.title = "Risk Group",
xlab = "Time (years)",
ylab = "Survival Probability",
title = "Kaplan-Meier Survival Curves by Risk Group"
)
logrank_test <- survdiff(
Surv(days_to_last_follow_up, vital_status) ~
risk_cat,
data = pat_risk_df)
return(list(
p_value_df=p_value_df,
pat_risk_df=pat_risk_df,
p_km_surv=p_km_surv,
logrank_test=logrank_test,
cox_model=multivariate_cox_model
))
}
#' RNA-Seq risk models
task_data_p_luminal <- task_data_p %>%
filter(patient %in% bc_pat_luminal) %>%
select(-patient)
task_data_p_her2 <- task_data_p %>%
filter(patient %in% bc_pat_her2) %>%
select(-patient)
task_data_p_tnbc <- task_data_p %>%
filter(patient %in% bc_pat_tnbc) %>%
select(-patient)
task_data_p_luminal_stgenes <- task_data_p %>%
filter(patient %in% bc_pat_luminal) %>%
select(-patient) %>%
select(vital_status, days_to_last_follow_up, any_of(make_clean_names(gene_cols)))
task_data_p_her2_stgenes <- task_data_p %>%
filter(patient %in% bc_pat_her2) %>%
select(-patient) %>%
select(vital_status, days_to_last_follow_up, any_of(make_clean_names(gene_cols)))
task_data_p_tnbc_stgenes <- task_data_p %>%
filter(patient %in% bc_pat_tnbc) %>%
select(-patient) %>%
select(vital_status, days_to_last_follow_up, any_of(make_clean_names(gene_cols)))
#' Predicted GE risk models
# Luminal subset for predicted GE
surv_tasks_dat_list_luminal <- getSurvPredGEDatList(pat_subset=bc_pat_luminal)
surv_tasks_dat_list_her2 <- getSurvPredGEDatList(pat_subset=bc_pat_her2)
surv_tasks_dat_list_tnbc <- getSurvPredGEDatList(pat_subset=bc_pat_tnbc)
surv_dat_list <- c(
list(task_data_p_luminal, task_data_p_luminal_stgenes),
surv_tasks_dat_list_luminal,
list(task_data_p_her2, task_data_p_her2_stgenes),
surv_tasks_dat_list_her2,
list(task_data_p_tnbc,
task_data_p_tnbc_stgenes),
surv_tasks_dat_list_tnbc
) %>%
`names<-`(
c("RNA-Seq_luminal",
"RNA-Seq-STgenes_luminal",
sprintf("%s_luminal", model_names),
"RNA-Seq_her2",
"RNA-Seq-STgenes_her2",
sprintf("%s_her2", model_names),
"RNA-Seq_tnbc",
"RNA-Seq-STgenes_tnbc",
sprintf("%s_tnbc", model_names))
)
pred_ge_surv_risk_luminal <- lapply(
surv_tasks_dat_list_luminal,
calcSurvRisk
) %>%
`names<-`(model_names)
pred_ge_surv_risk_her2 <- lapply(
seq_along(surv_tasks_dat_list_her2),
function(i) {
calcSurvRisk(surv_data=surv_tasks_dat_list_her2[[i]])
}) %>%
`names<-`(model_names)
pred_ge_surv_risk_tnbc <- lapply(
seq_along(surv_tasks_dat_list_tnbc),
function(i) {
calcSurvRisk(surv_tasks_dat_list_tnbc[[i]])
}) %>%
`names<-`(model_names)
gt_ge_surv_risk_luminal <- calcSurvRisk(task_data_p_luminal)
gt_ge_surv_risk_her2 <- calcSurvRisk(task_data_p_her2)
gt_ge_surv_risk_tnbc <- calcSurvRisk(task_data_p_tnbc)
gt_ge_stgenes_surv_risk_luminal <- calcSurvRisk(task_data_p_luminal_stgenes)
gt_ge_stgenes_surv_risk_her2 <- calcSurvRisk(task_data_p_her2_stgenes)
gt_ge_stgenes_surv_risk_tnbc <- calcSurvRisk(task_data_p_tnbc_stgenes)
surv_risk_list <- c(
list(gt_ge_surv_risk_luminal),
list(gt_ge_stgenes_surv_risk_luminal),
pred_ge_surv_risk_luminal,
list(gt_ge_surv_risk_her2),
list(gt_ge_stgenes_surv_risk_her2),
pred_ge_surv_risk_her2,
list(gt_ge_surv_risk_tnbc),
list(gt_ge_stgenes_surv_risk_tnbc),
pred_ge_surv_risk_tnbc
) %>%
`names<-`(
c("RNA-Seq_luminal",
"RNA-Seq-STgenes_luminal",
sprintf("%s_luminal", model_names),
"RNA-Seq_her2",
"RNA-Seq-STgenes_her2",
sprintf("%s_her2", model_names),
"RNA-Seq_tnbc",
"RNA-Seq-STgenes_tnbc",
sprintf("%s_tnbc", model_names))
)
saveRDS(surv_risk_list,
file="data/raw/tcga_brca/tcga_pred_ge_surv_risk_list_logrna_meannorm.rds")
surv_risk_list <- readRDS(file="data/raw/tcga_brca/tcga_pred_ge_surv_risk_list_logrna_meannorm.rds")
Boxplot of gene expression values after transformation for each sample from the TNBC & HER2 subset of the TCGA data and for all genes that were measured in the HER2+ spatial transcriptomics dataset.
# Plot boxplot of all genes
surv_risk_top10feat_data <- lapply(
seq_along(surv_dat_list),
function(i) {
surv_dat_list[[i]] %>%
tibble::rownames_to_column("rna_id") %>%
mutate(task_id = names(surv_dat_list)[i])
}) %>%
bind_rows() %>%
pivot_longer(cols=!c("rna_id","task_id",
"vital_status","days_to_last_follow_up"),
names_to="gene",
values_to="exprs") %>%
filter(!is.na(exprs)) %>%
separate(task_id, c("data","subgroup"), sep="_")
supp_fig_surv_dat_tnbc <- surv_risk_top10feat_data %>%
filter(subgroup=="tnbc") %>%
ggplot()+
aes(x=rna_id,y=exprs)+
geom_boxplot()+
facet_wrap(~data, scales="free")+
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
labs(title="Boxplots of all genes in each data",
subtitle="TNBC BC Subset")
supp_fig_surv_dat_tnbc
supp_fig_surv_dat_her2 <- surv_risk_top10feat_data %>%
filter(subgroup=="her2") %>%
ggplot()+
aes(x=rna_id,y=exprs)+
geom_boxplot()+
facet_wrap(~data, scales="free")+
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
labs(title="Boxplots of all genes in each data",
subtitle="HER2 BC Subset")
supp_fig_surv_dat_her2
set.seed(12)
system.time({
surv_risk_classifyr_list_top5feat_cv_3fold100rep <- lapply(
seq_along(surv_dat_list),
function(i) {
print(i)
survCrossValidated <- crossValidate(
surv_dat_list[[i]],
c("days_to_last_follow_up", "vital_status"),
nFeatures = c(5),
nFolds = 3, nRepeats = 100,
classifier="CoxPH",
selectionMethod = "CoxPH",
nCores = 40)
}) %>%
`names<-`(names(surv_dat_list))
})
saveRDS(surv_risk_classifyr_list_top5feat_cv_3fold100rep,
file="data/raw/tcga_brca/final_surv_risk_classifyr_list_top5feat_cv_3fold100rep.rds")
surv_risk_classifyr_list_top5feat_cv_3fold100rep <- readRDS(
file="data/raw/tcga_brca/final_surv_risk_classifyr_list_top5feat_cv_3fold100rep.rds")
surv_eval_dat <- surv_risk_classifyr_list_top5feat_cv_3fold100rep
surv_risk_classifyr_cindex <- lapply(
seq_along(surv_eval_dat),
function(i) {
surv_eval_dat[[i]]@predictions %>%
as.data.frame() %>%
left_join(
data.frame(
sample = surv_eval_dat[[i]]@originalNames,
truth = surv_eval_dat[[i]]@actualOutcome
),
by=c("sample")
) %>%
mutate(subset_name = names(surv_eval_dat)[i])
}
) %>%
bind_rows() %>%
left_join(
colData(tcga_rnaseq_data) %>%
as.data.frame() %>%
select(barcode, paper_pathologic_stage),
by=c("sample"="barcode")
) %>%
mutate(time=as.numeric(gsub("\\+","",truth)),
event=ifelse(grepl("\\+",truth),0,1)) %>%
group_by(subset_name,permutation,fold) %>%
group_modify(~{
concordance(
truth~risk,
data=.x,
reverse=TRUE)$concordance %>%
as.data.frame() %>%
rename("cindex"=".")
}) %>%
group_by(subset_name) %>%
mutate(cindex_avg=mean(cindex))
C-indices of multivariate cox regression models predicting survival of TCGA-BRCA patients, using RNA-Seq bulk, RNA-Seq bulk using only genes present in HER2+ ST dataset, and the predicted pseudobulk from each method. C-indices were calculated from the test sets of a 3-fold CV with 100 repeats trained within HER2+, luminal and TNBC breast cancer clinical subtypes
# Plot cindex
p_cind_boxplot <- surv_risk_classifyr_cindex %>%
separate_wider_delim("subset_name", "_", names=c("data","subgroup")) %>%
group_by(data, permutation, subgroup) %>%
summarise(cindex = mean(cindex, na.rm=T), .groups="keep") %>%
group_by(data) %>%
mutate(cindex_avg = mean(cindex)) %>%
ungroup() %>%
left_join(
surv_risk_classifyr_cindex %>%
separate_wider_delim("subset_name", "_", names=c("data","subgroup"))%>%
filter(!(data %in% c("RNA-Seq","RNA-Seq-STgenes"))) %>%
group_by(data, subgroup) %>%
summarise(cindex=mean(cindex,na.rm=T), .groups="drop") %>%
group_by(subgroup) %>%
mutate(max_ind = ifelse(cindex == max(cindex), "*","")) %>%
select(data, subgroup, max_ind),
by=c("data"="data","subgroup"="subgroup")
) %>%
ungroup() %>%
mutate(data = factor(data, levels = c("THItoGene", "TCGN", "EGNv2","EGNv1", "Hist2ST", "DeepPT", "DeepSpaCE", "GeneCodeR", "HisToGene", "ST-Net", "RNA-Seq" ,"RNA-Seq-STgenes"))) %>%
mutate(data = as.factor(data)) %>%
mutate(subgroup=case_when(
subgroup=="her2" ~ "HER2+",
subgroup=="tnbc" ~ "TNBC",
subgroup=="luminal" ~ "Luminal",
TRUE~ ""
)) %>%
ggplot() +
aes(x=data,y=cindex, colour=data, fill=data)+
geom_boxplot(alpha=.5) +
facet_wrap(~subgroup, nrow=2, scales="free_x")+
# scale_y_continuous(limits = c(0, NA), expand = c(0,0))+
scale_fill_manual(values=dl_method_pal)+
scale_colour_manual(values=dl_method_pal)+
coord_flip()+
labs(#title="C-index over each BC subgroup",
#subtitle="Models fit using top 10 genes within each data subset",
x="",y="C-index")+
th +
theme(legend.position = "none")
p_cind_boxplot
Kaplan-Meier curves constructed using the average risk prediction from a 3-fold CV with 100 repeats within the HER2+ breast cancer subtype.
avg_risk_list <- lapply(
seq_along(surv_eval_dat),
function(i) {
surv_eval_dat[[i]]@predictions %>%
as.data.frame() %>%
left_join(
data.frame(
sample = surv_eval_dat[[i]]@originalNames,
truth = surv_eval_dat[[i]]@actualOutcome
),
by=c("sample")
) %>%
mutate(subset_name = names(surv_eval_dat)[i])
}
) %>%
bind_rows() %>%
mutate(truth = as.character(truth)) %>%
mutate(time=as.numeric(gsub("\\+","",truth)),
event=ifelse(grepl("\\+",truth),0,1)) %>%
group_by(subset_name, sample, truth, time, event) %>%
summarise(avg_risk = mean(risk), .groups="drop") %>%
group_by(subset_name) %>%
mutate(risk_cat = ifelse(avg_risk >= median(avg_risk),"high-risk","low-risk")) %>%
split(., f=.$subset_name)
km_list <- lapply(
seq_along(avg_risk_list),
function(i) {
surv_fit = survfit(Surv(time, event) ~ risk_cat,
data = avg_risk_list[[i]])
p_km_surv <- ggsurvplot(
fit = surv_fit,
data = avg_risk_list[[i]],
risk.table = FALSE,
pval = TRUE,
pval.size = 4,
legend.title = "Risk Group",
xlab = "Time (days)",
ylab = "Survival Probability",
title = "Kaplan-Meier Survival Curves by Risk Group"
)
# calculate logrank pvalue
p_value <- surv_pvalue(surv_fit(
Surv(time, event) ~ risk_cat,
data = avg_risk_list[[i]]
))
logrank_test <- survdiff(Surv(time, event) ~
risk_cat,
data = avg_risk_list[[i]])
return(
p_km_surv$data.survplot %>%
mutate(pval.txt = p_value$pval.txt) %>%
mutate(pval = p_value$pval) %>%
mutate(subgroup = names(avg_risk_list)[i])
)
}
)
# extract plot data from ggsurvplot
km_surv_plot_dat_df <- km_list %>%
bind_rows() %>%
separate(subgroup, c("data", "subgroup"), sep = "_") %>%
mutate(
subgroup = case_when(
subgroup == "her2" ~ "HER2+",
subgroup == "tnbc" ~ "TNBC",
subgroup == "luminal" ~ "Luminal",
TRUE ~ ""
) %>%
factor(., levels = c("HER2+", "TNBC", "Luminal"))
)
th <- theme(text=element_text(size=16),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "black", size=0.7, fill=NA) )
fig_4i_cv <- km_surv_plot_dat_df %>%
filter(subgroup=="HER2+" & data!="RNA-Seq") %>%
mutate(
data = factor(data,
levels=c("RNA-Seq-STgenes",
"ST-Net",
"HisToGene",
"GeneCodeR",
"DeepSpaCE",
"DeepPT",
"Hist2ST",
"EGNv1",
"EGNv2",
"TCGN",
"THItoGene")
)
) %>%
ggplot(aes(x=time/365,y=surv, col=risk_cat))+
geom_step(linewidth = 1, linetype = 1)+
geom_point(size = 4.5, shape = "+")+
geom_text(aes(label=pval.txt, x=-Inf,y=-Inf),
vjust=-1,hjust=-.2,
colour="grey60", check_overlap = TRUE)+
scale_y_continuous(limits = c(0, 1.05), expand=c(0,0))+
scale_x_continuous(expand=c(0,0))+
ggh4x::facet_grid2(rows=vars(subgroup),cols=vars(data),
scales="free", independent = "all",switch="y")+
theme(legend.position="none",
strip.placement = "outside",
strip.switch.pad.grid = unit(1, "cm"),
axis.title.y = element_text(vjust = -15))+
th+
labs(x="Time (years)",y="Survival probability",col="Risk Category")
fig_4i_cv
## Warning: The `scale_name` argument of `discrete_scale()` is deprecated as of ggplot2
## 3.5.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fig_4e_cv_tnbc_luminal <- km_surv_plot_dat_df %>%
filter(subgroup!="HER2+" & data!="RNA-Seq") %>%
mutate(
data = factor(data,
levels=c("RNA-Seq-STgenes",
"ST-Net",
"HisToGene",
"GeneCodeR",
"DeepSpaCE",
"DeepPT",
"Hist2ST",
"EGNv1",
"EGNv2",
"TCGN",
"THItoGene")
)
) %>%
ggplot(aes(x=time/365,y=surv, col=risk_cat))+
geom_step(linewidth = 1, linetype = 1)+
geom_point(size = 4.5, shape = "+")+
geom_text(aes(label=pval.txt, x=-Inf,y=-Inf),
vjust=-1,hjust=-.2,
colour="grey60", check_overlap = TRUE)+
scale_y_continuous(limits = c(0, 1.05), expand=c(0,0))+
scale_x_continuous(expand=c(0,0))+
ggh4x::facet_grid2(rows=vars(subgroup),cols=vars(data),
scales="free", independent = "all",switch="y")+
theme(legend.position="bottom",
strip.placement = "outside",
strip.switch.pad.grid = unit(1, "cm"),
axis.title.y = element_text(vjust = -15))+
th +
labs(x="Time (years)",y="Survival probability",col="Risk Category")
fig_4e_cv_tnbc_luminal
Kaplan-Meier curves for patients split into high and low risk groups by the median risk prediction of the multivariate cox regression models for each method and (f) HER2+, (g) luminal and (h) TNBC breast cancer subtypes. Models were trained on all patients and then the predictions of the training patients were used. The p-value represents the result of the logrank test for assessing the statistical significance of differences in survival between the groups.
calcSurvRisk2 <- function(surv_data, selected_genes) {
# Prepare the input data for Lasso regression
X <- surv_data[, selected_genes, drop = FALSE]
Y <- surv_data[, c("days_to_last_follow_up", "vital_status")]
# Fit a multivariate Cox regression model
multivariate_cox_model <- coxph(Surv(Y$days_to_last_follow_up, Y$vital_status) ~ .,
data = X)
# Calculate the risk scores for each patient
patient_risk <- coef(multivariate_cox_model) %*% t(as.matrix(X))
# Calculate the median risk score
median_pat_risk <- median(patient_risk)
pat_risk_df <- patient_risk %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("rna_id") %>%
rename(risk=V1) %>%
mutate(risk_cat = ifelse(risk >= median_pat_risk,"high-risk","low-risk")) %>%
bind_cols(Y)
surv_obj <- Surv(pat_risk_df$days_to_last_follow_up, pat_risk_df$vital_status)
surv_fit = survfit(
Surv(days_to_last_follow_up, vital_status) ~ risk_cat,
data = pat_risk_df
)
p_km_surv <- ggsurvplot(
fit = surv_fit,
data = pat_risk_df,
risk.table = FALSE,
pval = TRUE,
pval.size=4,
legend.title = "Risk Group",
xlab = "Time (years)",
ylab = "Survival Probability",
title = "Kaplan-Meier Survival Curves by Risk Group"
)
logrank_test <- survdiff(
Surv(days_to_last_follow_up, vital_status) ~
risk_cat,
data = pat_risk_df)
return(list(
pat_risk_df=pat_risk_df,
p_km_surv=p_km_surv,
logrank_test=logrank_test,
cox_model=multivariate_cox_model
))
}
surv_risk_list_top5feat <- lapply(
seq_along(surv_dat_list),
function(i) {
calcSurvRisk2(
surv_data=surv_dat_list[[i]],
selected_genes=surv_risk_list[[i]]$p_value_df %>%
arrange(p_value) %>%
slice(1:5) %>%
pull(gene)
)
}) %>%
`names<-`(names(surv_dat_list))
# Plot survival curves
km_surv_plot_dat_df <- lapply(which(!(names(surv_risk_list_top5feat) %in% c("RNA-Seq_luminal",
"RNA-Seq_her2",
"RNA-Seq_tnbc"))),
function(i) {
# calculate logrank pvalue
p_value <- surv_pvalue(
surv_fit(
Surv(days_to_last_follow_up, vital_status) ~ risk_cat,
data = surv_risk_list_top5feat[[i]]$pat_risk_df
)
)
# extract plot data from ggsurvplot
surv_risk_list_top5feat[[i]]$p_km_surv$data.survplot %>%
mutate(pval.txt = sprintf("p = %.2g", p_value$pval)) %>%
mutate(pval=p_value$pval) %>%
mutate(subgroup = names(surv_risk_list_top5feat)[i])
}) %>%
bind_rows() %>%
separate(subgroup, c("data","subgroup"), sep="_") %>%
mutate(subgroup=case_when(
subgroup=="her2" ~ "HER2+",
subgroup=="tnbc" ~ "TNBC",
subgroup=="luminal" ~ "Luminal",
TRUE~ ""
) %>%
factor(., levels=c("HER2+","TNBC","Luminal")))
fig_4f_h <- km_surv_plot_dat_df %>%
mutate(
data = factor(data,
levels=c("RNA-Seq-STgenes", km_surv_plot_dat_df %>%
distinct(pval,data,subgroup) %>%
filter(data != "RNA-Seq-STgenes") %>%
group_by(data) %>%
summarise(mean_pval = mean(pval)) %>%
arrange(mean_pval) %>%
pull(data)))
) %>%
mutate(
data = factor(data,
levels=c("RNA-Seq-STgenes",
"ST-Net",
"HisToGene",
"GeneCodeR",
"DeepSpaCE",
"DeepPT",
"Hist2ST",
"EGNv1",
"EGNv2",
"TCGN",
"THItoGene")
)
) %>%
ggplot(aes(x=time/365,y=surv, col=risk_cat))+
geom_step(linewidth = 1, linetype = 1)+
geom_point(size = 4.5, shape = "+")+
geom_text(aes(label=pval.txt, x=-Inf,y=-Inf),
vjust=-1,hjust=-.2,
colour="grey60", check_overlap = TRUE)+
scale_y_continuous(limits = c(0, 1.05), expand=c(0,0))+
scale_x_continuous(expand=c(0,0))+
ggh4x::facet_grid2(rows=vars(subgroup),cols=vars(data),
scales="free", independent = "all",switch="y")+
theme(legend.position="bottom",
strip.placement = "outside",
strip.switch.pad.grid = unit(1, "cm"),
axis.title.y = element_text(vjust = -15))+
th+
labs(x="Time (years)",y="Survival probability",col="Risk Category")
fig_4f_h
#old version c-index
risk_cat_p_df_top5feat <- lapply(
seq_along(surv_risk_list_top5feat),
function(i) {
data.frame(
task_id = names(surv_risk_list_top5feat)[i],
logrank_p = surv_risk_list_top5feat[[i]]$logrank_test$pvalue,
cindex = survival::concordance(surv_risk_list_top5feat[[i]]$cox_model)$concordance
)
}
) %>%
bind_rows() %>%
separate(task_id, c("data","subgroup"), sep="_")
risk_cat_p_df_top5feat <- risk_cat_p_df_top5feat%>%
arrange(factor(data, levels=c("proposed", "THItoGene", "TCGN", "EGNv2", "EGNv1", "Hist2ST", "DeepPT", "eepSpaCE", "GeneCodeR","HisToGene","ST-Net", "RNA-Seq-STgenes", "RNA-Seq")))
risk_cat_p_df_top5feat$data <- factor(risk_cat_p_df_top5feat$data, levels = unique(risk_cat_p_df_top5feat$data))
# Dottlot of c-index of coxph fits
fig_4bcd <- risk_cat_p_df_top5feat %>%
group_by(data) %>%
mutate(mean_cindex = mean(cindex)) %>%
ungroup() %>%
left_join(
risk_cat_p_df_top5feat %>%
filter(!(data %in% c("RNA-Seq","RNA-Seq-STgenes"))) %>%
group_by(subgroup) %>%
mutate(max_ind = ifelse(cindex == max(cindex), "*","")) %>%
select(data, subgroup, max_ind),
by=c("data"="data","subgroup"="subgroup")
) %>%
mutate(data = as.factor(data)) %>%
mutate(subgroup=case_when(
subgroup=="her2" ~ "HER2+",
subgroup=="tnbc" ~ "TNBC",
subgroup=="luminal" ~ "Luminal",
TRUE~ ""
)) %>%
ggplot() +
aes(x=data,y=cindex, colour=data, fill=data,
group=subgroup)+
geom_bar(stat="identity", alpha=.65,width=.85)+
geom_text(aes(label = max_ind),
hjust=1.75, vjust = 0.78, size=5,
colour = "white")+
facet_wrap(~subgroup, nrow=2, scales="free_x")+
scale_y_continuous(limits = c(0, NA), expand = c(0,0))+
scale_fill_manual(values=dl_method_pal)+
scale_colour_manual(values=dl_method_pal)+
coord_flip()+
labs(x="",y="C-index")+
theme(legend.position = "none")
fig_4bcd
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_text()`).
# Average each metric over each method and rank
pred_rank_gene_df <- tcga_pred_m_df %>%
ungroup() %>%
# Inf values in NRMSE, remove them
mutate_at(vars(c(nrmse_range, nrmse_sd)),
function(col) ifelse(col == Inf, NaN, col)) %>%
# Average over each model
group_by(model_id) %>%
summarise_if(is.numeric, function(col) mean(col,na.rm = T)) %>%
mutate(cor_pearson_gene_r = rank(-cor_pearson),
nrmse_gene_r = rank(nrmse_range),
js_div_gene_r = rank(js_div),
mi_gene_r = rank(-mi)) %>%
rowwise() %>%
mutate(mean_gene_rank = mean(c(cor_pearson_gene_r, nrmse_gene_r, js_div_gene_r, mi_gene_r))) %>%
ungroup() %>%
arrange(mean_gene_rank) %>%
mutate(model_id = factor(model_id, levels=unique(model_id))) %>%
pivot_longer(
cols=c("cor_pearson_gene_r", "nrmse_gene_r",
"js_div_gene_r", "mi_gene_r", "mean_gene_rank"),
names_to="metric_rank",
values_to = "rank"
) %>%
mutate(metric_rank = factor(metric_rank,
levels=c("cor_pearson_gene_r", "nrmse_gene_r",
"js_div_gene_r", "mi_gene_r", "mean_gene_rank")))
pred_rank_pat_df <- tcga_pred_pat_m_df %>%
ungroup() %>%
# Inf values in NRMSE, remove them
mutate_at(vars(c(nrmse_range, nrmse_sd)),
function(col) ifelse(col == Inf, NaN, col)) %>%
# Average over each model
group_by(model_id) %>%
summarise_if(is.numeric, function(col) mean(col,na.rm = T)) %>%
mutate(cor_pearson_pat_r = rank(-cor_pearson),
nrmse_pat_r = rank(nrmse_range),
js_div_pat_r = rank(js_div),
mi_pat_r = rank(-mi)) %>%
rowwise() %>%
mutate(mean_pat_rank = mean(c(cor_pearson_pat_r, nrmse_pat_r, js_div_pat_r, mi_pat_r))) %>%
ungroup() %>%
arrange(mean_pat_rank) %>%
mutate(model_id = factor(model_id, levels=unique(model_id))) %>%
pivot_longer(
cols=c("cor_pearson_pat_r", "nrmse_pat_r",
"js_div_pat_r", "mi_pat_r", "mean_pat_rank"),
names_to="metric_rank",
values_to = "rank"
) %>%
mutate(metric_rank = factor(metric_rank,
levels=c("cor_pearson_pat_r", "nrmse_pat_r",
"js_div_pat_r", "mi_pat_r", "mean_pat_rank")))
pred_rank_df <- pred_rank_pat_df %>%
select(model_id, metric_rank, rank) %>%
bind_rows(
pred_rank_gene_df %>%
select(model_id, metric_rank, rank)
)
saveRDS(pred_rank_df,
file="data/processed/her2st/pred_rank_df_tcgabrca.rds")
surv_cv_avg_df <- surv_risk_classifyr_cindex %>%
separate_wider_delim("subset_name", "_", names=c("data","subgroup")) %>%
filter(!(data %in% c("RNA-Seq","RNA-Seq-STgenes"))) %>%
group_by(data, permutation, subgroup) %>%
summarise(cindex = mean(cindex, na.rm=T), .groups="drop") %>%
group_by(data, subgroup) %>%
summarise(cindex = mean(cindex), .groups="drop") %>%
group_by(data) %>%
summarise(cindex = mean(cindex), .groups="drop") %>%
mutate(cindex_cv_r = rank(-cindex)) %>%
left_join(
km_surv_plot_dat_df %>%
filter(!(data %in% c("RNA-Seq","RNA-Seq-STgenes"))) %>%
distinct(pval, data, subgroup) %>%
group_by(data) %>%
summarise(surv_diff_cv = mean(pval)) %>%
mutate(surv_diff_cv_r = rank(surv_diff_cv)),
by="data"
) %>%
select(data, cindex_cv_r, surv_diff_cv_r)
clin_trans_rank_df <- risk_cat_p_df_top5feat %>%
filter(!(data %in% c("RNA-Seq","RNA-Seq-STgenes"))) %>%
group_by(data) %>%
summarise(cindex = mean(cindex),
pval = mean(logrank_p)) %>%
mutate(cindex_r = rank(-cindex),
surv_diff_r = rank(pval)) %>%
select(-cindex, -pval) %>%
left_join(
surv_cv_avg_df,
by="data"
) %>%
rowwise() %>%
mutate(mean_clin_r = mean(c(cindex_r, surv_diff_r,
cindex_cv_r, surv_diff_cv_r))) %>%
ungroup() %>%
pivot_longer(cols=!c("data"),
values_to="rank", names_to="metric_rank")
saveRDS(clin_trans_rank_df,
file="./data/processed/her2st/pred_rank_df_tcga_clinicaltrans.rds")
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.21.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Sydney
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pROC_1.18.5 infotheo_1.2.0.1
## [3] ggthemr_1.1.0 survminer_0.5.0
## [5] ggpubr_0.6.0 ggplot2_3.5.1
## [7] ClassifyR_3.8.5 survival_3.7-0
## [9] BiocParallel_1.38.0 MultiAssayExperiment_1.30.3
## [11] generics_0.1.3 glmnet_4.1-8
## [13] Matrix_1.7-0 cutpointr_1.2.0
## [15] scran_1.32.0 scuttle_1.14.0
## [17] circlize_0.4.16 ComplexHeatmap_2.20.0
## [19] cowplot_1.1.3 data.table_1.16.4
## [21] readxl_1.4.3 stringr_1.5.1
## [23] tidyr_1.3.1 janitor_2.2.0
## [25] magrittr_2.0.3 dplyr_1.1.4
## [27] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [29] Biobase_2.64.0 GenomicRanges_1.56.0
## [31] GenomeInfoDb_1.40.1 IRanges_2.38.0
## [33] S4Vectors_0.42.0 BiocGenerics_0.50.0
## [35] MatrixGenerics_1.16.0 matrixStats_1.3.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.16.0
## [3] jsonlite_1.8.8 shape_1.4.6.1
## [5] farver_2.1.2 rmarkdown_2.27
## [7] GlobalOptions_0.1.2 zlibbioc_1.50.0
## [9] vctrs_0.6.5 DelayedMatrixStats_1.26.0
## [11] rstatix_0.7.2 htmltools_0.5.8.1
## [13] S4Arrays_1.4.1 BiocNeighbors_1.22.0
## [15] broom_1.0.7 cellranger_1.1.0
## [17] SparseArray_1.4.8 Formula_1.2-5
## [19] sass_0.4.9 bslib_0.7.0
## [21] plyr_1.8.9 zoo_1.8-12
## [23] lubridate_1.9.3 cachem_1.1.0
## [25] igraph_2.0.3 lifecycle_1.0.4
## [27] iterators_1.0.14 pkgconfig_2.0.3
## [29] rsvd_1.0.5 R6_2.5.1
## [31] fastmap_1.2.0 GenomeInfoDbData_1.2.12
## [33] snakecase_0.11.1 clue_0.3-65
## [35] digest_0.6.35 colorspace_2.1-0
## [37] dqrng_0.4.0 irlba_2.3.5.1
## [39] beachmat_2.20.0 labeling_0.4.3
## [41] philentropy_0.8.0 fansi_1.0.6
## [43] km.ci_0.5-6 timechange_0.3.0
## [45] mgcv_1.9-1 httr_1.4.7
## [47] abind_1.4-5 compiler_4.4.1
## [49] withr_3.0.0 doParallel_1.0.17
## [51] backports_1.5.0 carData_3.0-5
## [53] highr_0.11 ggupset_0.4.0
## [55] ggsignif_0.6.4 DelayedArray_0.30.1
## [57] rjson_0.2.21 bluster_1.14.0
## [59] tools_4.4.1 glue_1.7.0
## [61] nlme_3.1-165 cluster_2.1.6
## [63] reshape2_1.4.4 gtable_0.3.5
## [65] KMsurv_0.1-5 BiocSingular_1.20.0
## [67] ScaledMatrix_1.12.0 metapod_1.12.0
## [69] car_3.1-3 utf8_1.2.4
## [71] XVector_0.44.0 foreach_1.5.2
## [73] pillar_1.9.0 limma_3.60.2
## [75] splines_4.4.1 lattice_0.22-6
## [77] tidyselect_1.2.1 locfit_1.5-9.9
## [79] knitr_1.46 gridExtra_2.3
## [81] edgeR_4.2.0 xfun_0.44
## [83] statmod_1.5.0 stringi_1.8.4
## [85] UCSC.utils_1.0.0 yaml_2.3.8
## [87] evaluate_0.23 codetools_0.2-20
## [89] tibble_3.2.1 cli_3.6.2
## [91] xtable_1.8-4 munsell_0.5.1
## [93] jquerylib_0.1.4 survMisc_0.5.6
## [95] Rcpp_1.0.12 png_0.1-8
## [97] parallel_4.4.1 ggh4x_0.3.0
## [99] sparseMatrixStats_1.16.0 scales_1.3.0
## [101] purrr_1.0.2 crayon_1.5.2
## [103] GetoptLong_1.0.5 rlang_1.1.3